#### Generate Random Forest Models/Results ####

# Libraries
#library(tidyverse)
#library(tidymodels)
#library(rio)
#library(here)
#library(ggthemes)

# data_pnas = import(here("Data","data_pnas.rds"))

#### PREP DATA ####

# Load Governor Party Data

gov_parties = import(here("Data","gov_parties.csv")) |> 
  mutate(across(ends_with("_date"), dmy)) 

# Basic Recoding of DVs
data_dv = data_pnas |> 
  filter(pid != "Independent") |> 
  left_join(gov_parties, join_by(state == state,
                                 between(date, inaug_date, end_date))) |> 
  # Dependent Variables
  mutate(# Factor norms/violence individually
    across(starts_with("violence"), factor),
    across(starts_with("norm_"), factor)) |> 
  ## Independent Variables
  # Demographics
  mutate(
    white = case_when(
      race == 1 ~ "White",
      race %in% 2:9 ~ "Non-white",
      T ~ NA_character_),
    college = case_when(
      educ %in% 1:4 ~ "No College",
      educ %in% 5:6 ~ "College"),
    urban = case_match(
      urbanicity2,
      1 ~ "Big city",
      2 ~ "Smaller city",
      3 ~ "Suburban",
      4 ~ "Small town",
      5 ~ "Rural",
      .default = NA),
    male = ifelse(gender == 1, 1, 0),
    age = year(date) - birthyr,
    faminc = ifelse(faminc_new == 97, NA, faminc_new)
  ) |> 
  # Attitudes
  mutate(
    strong = case_when(
      pid7 %in% c(1,7) ~ 1,
      pid7 %in% 2:6 ~ 0
    ),
    poli_interest = case_when(
      newsint %in% 1:4 ~ abs(newsint-4),
      T ~ NA_integer_
    ),
    oppo_gov = case_when(
      (pid != gov_party) & (pid %in% c("Democrat","Republican")) ~ 1,
      pid == "Independent" ~ 0,
      pid == gov_party ~ -1,
      T ~ NA_integer_
    )
  ) |> 
  select(pid, white, college, bornagain, urban, male, age, faminc,
         strong, poli_interest, outparty, inparty,
         oppo_gov, violence1re:violence6re, norm_judgesre:norm_loyaltyre) |> 
  drop_na()

# Train/Test Split
set.seed(523)
# Pooled
data_split = initial_split(data_dv, prop = .8, strata = pid)
data_train = training(data_split)
data_test = testing(data_split)
export(data_split, here("Output","data_split.rds"))

# Democrat
data_train_dem = data_train |> filter(pid == "Democrat")
data_test_dem = data_test |> filter(pid == "Democrat")
data_combined_dem = bind_rows(data_train_dem, data_test_dem)

ind = list(analysis = seq(nrow(data_train_dem)),
           assessment = nrow(data_train_dem) + seq(nrow(data_test_dem)))

data_split_dem = make_splits(ind, data_combined_dem)

# Republican
data_train_rep = data_train |> filter(pid == "Republican")
data_test_rep = data_test |> filter(pid == "Republican")
data_combined_rep = bind_rows(data_train_rep, data_test_rep)

ind = list(analysis = seq(nrow(data_train_rep)),
           assessment = nrow(data_train_rep) + seq(nrow(data_test_rep)))

data_split_rep = make_splits(ind, data_combined_rep)

#### TRAIN AND FIT MODELS ####
rf_fit_func = \(dv){
  # Define Recipe
  f = as.formula(paste0(dv, " ~ pid + white + college + bornagain + urban + male + age + faminc + strong + poli_interest + oppo_gov +
                        inparty + outparty"))
  rec_pool = recipe(f, data = data_train)
  rec_dem = recipe(f, data = data_train) |> 
    update_role(pid, new_role = "id variable") |> 
    step_filter(pid == "Democrat")
  rec_rep = recipe(f, data = data_train) |> 
    update_role(pid, new_role = "id variable") |> 
    step_filter(pid == "Republican")
  # Set Model
  rf_model = rand_forest(mtry = 4,
                         min_n = 28,
                         trees = 1000) |> 
    set_engine("ranger",
               num.threads = parallel::detectCores(),
               importance = "impurity_corrected",
               seed = 613) |> 
    set_mode("classification")
  # Set workflow(s)
  rf_wflow_pool = workflow() |> 
    add_model(rf_model) |> 
    add_recipe(rec_pool)
  rf_wflow_dem = workflow() |> 
    add_model(rf_model) |> 
    add_recipe(rec_dem)
  rf_wflow_rep = workflow() |> 
    add_model(rf_model) |> 
    add_recipe(rec_rep)
  # Fit Models
  rf_fitted_pool = rf_wflow_pool |> 
    last_fit(data_split)
  rf_fitted_dem = rf_wflow_dem |> 
    last_fit(data_split_dem)
  rf_fitted_rep = rf_wflow_rep |> 
    last_fit(data_split_rep)
  # Output vip
  model_id = c("Pooled","Democrats", "Republicans")
  vip_out = imap_dfr(list(rf_fitted_pool, rf_fitted_dem, rf_fitted_rep), \(x, idx){
    x |> 
      extract_fit_parsnip() |> 
      vip::vi() |> 
      mutate(DependentVar = dv,
             ModelSample = model_id[idx])
  })
  # Output list
  list(
    list(rf_fitted_pool,rf_fitted_dem,rf_fitted_rep),
    vip_out
  )
}

rf_demos_func = \(dv){
  # Define Recipe
  f = as.formula(paste0(dv, " ~ pid + white + college + bornagain + urban + male + age + faminc"))
  rec_pool = recipe(f, data = data_train)
  rec_dem = recipe(f, data = data_train) |> 
    update_role(pid, new_role = "id variable") |> 
    step_filter(pid == "Democrat")
  rec_rep = recipe(f, data = data_train) |> 
    update_role(pid, new_role = "id variable") |> 
    step_filter(pid == "Republican")
  # Set Model
  rf_model = rand_forest(mtry = 4,
                         min_n = 28,
                         trees = 1000) |> 
    set_engine("ranger",
               num.threads = parallel::detectCores(),
               importance = "impurity_corrected",
               seed = 613) |> 
    set_mode("classification")
  # Set workflow(s)
  rf_wflow_pool = workflow() |> 
    add_model(rf_model) |> 
    add_recipe(rec_pool)
  rf_wflow_dem = workflow() |> 
    add_model(rf_model) |> 
    add_recipe(rec_dem)
  rf_wflow_rep = workflow() |> 
    add_model(rf_model) |> 
    add_recipe(rec_rep)
  # Fit Models
  rf_fitted_pool = rf_wflow_pool |> 
    last_fit(data_split)
  rf_fitted_dem = rf_wflow_dem |> 
    last_fit(data_split_dem)
  rf_fitted_rep = rf_wflow_rep |> 
    last_fit(data_split_rep)
  # Output vip
  model_id = c("Pooled","Democrats", "Republicans")
  vip_out = imap_dfr(list(rf_fitted_pool, rf_fitted_dem, rf_fitted_rep), \(x, idx){
    x |> 
      extract_fit_parsnip() |> 
      vip::vi() |> 
      mutate(DependentVar = dv,
             ModelSample = model_id[idx])
  })
  # Output list
  list(
    list(rf_fitted_pool,rf_fitted_dem,rf_fitted_rep),
    vip_out
  )
}

rf_fit_all = map(c("norm_judgesre","norm_loyaltyre","norm_pollingre",
                   "norm_executivere","norm_censorshipre", paste0("violence",3:6,"re")),
                 rf_fit_func)

rf_demos_all = map(c("norm_judgesre","norm_loyaltyre","norm_pollingre",
                     "norm_executivere","norm_censorshipre", paste0("violence",3:6,"re")),
                   rf_demos_func)

rf_fit_vip = map_dfr(rf_fit_all, \(x){
  x[[2]]
})
rf_demos_vip = map_dfr(rf_demos_all, \(x){
  x[[2]]
})

export(rf_fit_vip, here("Output","randforest_vip.rds"))
export(rf_demos_vip, here("Output","randforest_demos_vip.rds"))

rf_fit_mods = map(rf_fit_all, \(x){
  x[[1]]
}) |> list_flatten()
rf_demos_mods = map(rf_demos_all, \(x){
  x[[1]]
}) |> list_flatten()

export(rf_fit_mods, here("Output","randforest_mods.rds"))
export(rf_demos_mods, here("Output","randforest_demos_mods.rds"))